# Replication script for
#
# "Political Divisions in Large Cities:
# The Socio-Spatial Basis of Legislative Behavior in Chicago and Toronto"
# Journal of Politics
#
# CHICAGO ANALYSIS
#
# November 2, 2023
#
# Zack Taylor, University of Western Ontario, zack.taylor@uwo.ca
# David A. Armstrong II, University of Western Ontario, dave.armstrong@uwo.ca

library(tibble)
library(dplyr)  
library(purrr)
library(ggplot2)
library(tidyr)
library(stringr)
library(corrr)
library(rio)
library(readstata13)
library(cowplot)
library(scales)
library(progress)
library(emIRT)
library(sf)
library(sp)
library(spdep)
library(ggrepel)
library(ggridges)
library(ggpubr)
library(pscl)
library(wnominate)
library(dwnominate)

# legR requires prior installation of the h2o library
# https://docs.h2o.ai/h2o/latest-stable/h2o-docs/downloading.html
library(h2o)

# the legR package must be downloaded from GitHub
library(remotes)
remotes::install_github(repo = "davidaarmstrong/legr",
                        upgrade = "always")
library(legR)

# SET UP CONSOLE LOG FILE ----

chicago_log <- file("chicago_log.txt")
sink(chicago_log, append = TRUE, type = "output") # Writing console output to log file
sink(chicago_log, append = TRUE, type = "message")
cat(readChar(rstudioapi::getSourceEditorContext()$path, # Writing currently opened R script to file
             file.info(rstudioapi::getSourceEditorContext()$path)$size))

# GENERAL ----

seed <- 3094 
set.seed(seed)
sims = 100 # Set number of simulations for Appendix E
seeds <- round(runif(sims*2, 1, 10000)) # Set seeds for Appendix E

# plot theme for Appendix D
theme_map <- function(base_size = 12) {
  theme_grey(base_size) %+replace%
    theme(
      panel.background = element_rect(fill = "transparent",colour = NA),
      panel.grid.minor = element_blank(),
      panel.grid.major = element_blank(),
      plot.background = element_rect(fill = "transparent",colour = NA),
      axis.ticks = element_blank(),
      axis.text = element_blank(),
      plot.margin = unit(c(0,0,0,0), unit = "pt", "lines"),
      legend.background = element_rect(fill = "transparent", color = NA),
      legend.key = element_rect(fill = "black", color = NA),
      legend.text = element_text(color = "black"),
      legend.title = element_text(color = "black"),
      legend.box = "vertical",
      legend.position = "right",
      complete = TRUE
    )}

# transpose function
trans <- function(m){
  as_tibble(t(m)) %>% 
    mutate(delta = colnames(m)) %>% 
    separate(delta, into=c("start", "end")) %>% 
    mutate(across(c("start", "end"), as.numeric))
}

# create figures object
figures <- NULL

# CHICAGO ----

city = "Chicago"

set.seed(seed)

## delete unneeded objects
rm(list = setdiff(ls(), c("figures", "seed", "seeds", "sims", "theme_map", "trans", "starttime")))

load("source/chi_rollcalls.rda")

maj <- colMeans(rcmat, na.rm=TRUE)
maj <- ifelse(maj < .5, 1-maj, maj)

unan <- tibble(
  m = maj, 
  term = trm, 
)
unan <- unan %>% 
  mutate(m = ifelse(m < .5, 1-m, m)) %>% 
  filter(m < .95)

uterms <- c("1979-83 Byrne","1983-87 Washington","1987 Washington","1987-89 Sawyer", "1989-91 Daley", "1991-95 Daley", "1995-99 Daley","1999-2003 Daley","2003-07 Daley","2007-11 Daley","2011-15 Emanuel","2015-19 Emanuel")
names(uterms) <- c(1,2,3,4,5,6,7,8,9,10,11,12)

## calculate Number of votes, cells and missed votes
n_votes <- sum(!is.na(c(rcmat)))
s <- split(as.data.frame(t(rcmat)), trm)
struct_miss <- sapply(s, \(d)sum(apply(d, 2, \(x)if(all(is.na(x))){length(x)}else{0})))
struct_miss <- sum(struct_miss)
idio_miss <- sum(is.na(c(rcmat))) - struct_miss
l <- lapply(s, \(d)apply(d, 2, \(x)c(n_miss = sum(is.na(x)), n_tot = length(x))))
allmiss <- sapply(l, \(x)x[1,])
all_v <- sapply(l, \(x)x[2,])
idio_miss_v <- sapply(1:ncol(allmiss), \(i){
  ifelse(allmiss[,i] == all_v[,i], 0, allmiss[,i])
})
idio_miss_pct <- idio_miss_v/all_v
idio_miss_pct <- apply(idio_miss_pct, 1, \(x)mean(x[which(x > 0)]))
sprintf("Number of Votes:  %.0f", n_votes)
sprintf("Number of Cells: %.0f", prod(dim(rcmat)))
sprintf("Number of Observed Votes: %0.f", sum(!is.na(c(rcmat))))
sprintf("Percentage of Missed Votes: %.2f%%", mean(na.omit(idio_miss_pct))*100)

## Number and Proportion in Modal Category
tabs <- apply(rcmat, 2, function(x)table(factor(x, levels=c(0,1))))
mcvotes <- sum(apply(tabs, 2, max))
sprintf("Number in the Modal Category %.0f", mcvotes)
sprintf("Proportion in the Modal Category: %.2f", mcvotes/n_votes)

## Calculate Agreement Score
a <- function(x,y){
  tmp <- rcm[c(x,y), ]
  tmp <- t(tmp)
  tmp <- na.omit(tmp)
  sum(tmp[,1] == tmp[,2])/nrow(tmp)
}
rcm <- rcmat

o <- outer(1:nrow(rcm), 1:nrow(rcm), Vectorize(a))
a <- o[upper.tri(o)]
a <- ifelse(a < .5, 1-a, a)
chi_agree <- a

sprintf("Median agreement score: %.0f%%", median(na.omit(chi_agree))*100)

## Estimate Models
ld <- data.frame(id = rownames(rcmat), name = rownames(rcmat))
orthogonal <- TRUE
out <- legR(
  rcmat, 
  terms=trm, 
  legis_data=ld, 
  nperterm=10, 
  minprop=.05, 
  nRounds = 1,
  othermax=NULL, 
  k=2, 
  seed=seed, 
  ndim=2, 
  method="glrm", 
  max_mem_size="8g", 
  est_model = FALSE
)

## set polarity anchors on priors
priors <- out$priors
priors[[1]]$x.mu0 <- matrix(0, ncol=1, nrow=nrow(priors[[1]]$x.mu0 ))
priors[[2]]$x.mu0 <- matrix(0, ncol=1, nrow=nrow(priors[[2]]$x.mu0 ))
priors[[1]]$x.sigma0 <- matrix(1, ncol=1, nrow=nrow(priors[[1]]$x.sigma0 ))
priors[[2]]$x.sigma0 <- matrix(1, ncol=1, nrow=nrow(priors[[2]]$x.sigma0 ))
priors[[1]]$x.mu0[which(out$dats[[1]]$id$name == "Madrzyk_JS"),1] <- 1
priors[[1]]$x.mu0[which(out$dats[[1]]$id$name == "Streeter_A"),1] <- -1
priors[[2]]$x.mu0[which(out$dats[[2]]$id$name == "Hagopian_GJ"),1] <- 1
priors[[2]]$x.mu0[which(out$dats[[2]]$id$name == "Oberman_MJ"),1] <- -1
priors[[1]]$x.sigma0[which(out$dats[[1]]$id$name %in% c("Streeter_A", "Madrzyk_JS"))] <- .1
priors[[2]]$x.sigma0[which(out$dats[[2]]$id$name %in% c("Oberman_MJ", "Hagopian_GJ"))] <- .1

out <- legR(
  rcmat, 
  terms=trm, 
  legis_data=ld, 
  priors = priors, 
  nperterm=10, 
  minprop=.05, 
  nRounds = 1,
  othermax=NULL, 
  k=2, 
  seed=seed, 
  ndim=2, 
  method="glrm", 
  max_mem_size="8g", 
  est_model = TRUE
)

chi_legr_out <- out
save(chi_legr_out, file="temp/chi_legr_out.rda")

## Collect results, ensure consistent polarity across time and orthogonalize
x <- gather_data(out)
x <- x %>% dplyr::filter(!((Dim1 == 0 | is.na(Dim1)) & (Dim2 == 0 | is.na(Dim2))))
x <- x %>% dplyr::arrange(session, name)
{if(orthogonal){
  flipx <- flip(x, id="name", vars=c("Dim1", "Dim2"), time="session")
  x <- left_join(x, flipx %>% mutate(session = as.numeric(session)))
  ## use the - in front of Dim1_gs*flip1 to make it positively related to mqs
  x <- x %>% mutate(Dim1 = -Dim1*flip1,
                    Dim2 = Dim2*flip2, 
                    Dim1 = -Dim1) %>% 
    select(-contains("flip")) 
  xs <- x %>% group_split(session) %>% map(function(x){
    o <- orthogonalize(x)
    o <- o %>% select(-Dim1, -Dim2) %>% rename(Dim1=Dim1_gs, Dim2=Dim2_gs)
    o})
  x <- bind_rows(xs)
}else{
  flipx <- flip(x, id="name", vars=c("Dim1", "Dim2"), time="session")
  x <- left_join(x, flipx %>% mutate(session = as.numeric(session)))
  ## use the - in front of Dim1_gs*flip1 to make it positively related to mqs
  x <- x %>% mutate(Dim1 = -Dim1*flip1,
                    Dim2 = Dim2*flip2, 
                    Dim1 = -Dim1, 
                    Dim2 = ifelse(session == 2, -Dim2, Dim2)) %>% 
    select(-contains("flip")) %>% 
    mutate(Dim2 = ifelse(session %in% c(1,3), -Dim2, Dim2)) 
}
}

# used in Appendix E
chi_baseline <- x
save(chi_baseline, file="temp/chi_baseline.rda")

## Figure 1: Chicago PRE bar plot ----
res <- NULL
for(i in 1:ncol(rcmat)){
  tmp.trm <- trm[i]
  dat <- as_tibble(rcmat[,i, drop=FALSE], rownames="name")
  names(dat)[2] <- "vote"
  lat <- x %>% filter(session == tmp.trm)
  dat <- inner_join(dat, lat, by="name")
  if(sum(dat$vote == 1, na.rm=TRUE) > 0 & sum(dat$vote == 0, na.rm=TRUE) > 0){
    if(tmp.trm < 13 ){    
      m1 <- glm(vote ~ Dim1, data=dat, family=binomial)
      m12 <- glm(vote ~ Dim1 + Dim2, data=dat, family=binomial)
      ce1 <- sum(m1$y != round(m1$fitted))
      ce12 <- sum(m12$y != round(m12$fitted))
      m2 <- glm(vote ~ Dim2, data=dat, family=binomial)
      ce2 <- sum(m2$y != round(m2$fitted))
      minvotes <- min(table(na.omit(dat$vote)))
      nvotes <- sum(!is.na(dat$vote))
      res <- rbind(res, data.frame(session=tmp.trm, 
                                   bill = i,
                                   minvote = minvotes, 
                                   ce1 = ce1, 
                                   ce2 = ce2, 
                                   ce12 = ce12, 
                                   n = nvotes))
    }else{
      m2 <- glm(vote ~ Dim2, data=dat, family=binomial)
      ce2 <- sum(m2$y != round(m2$fitted))
      minvotes <- min(table(na.omit(dat$vote)))
      nvotes <- sum(!is.na(dat$vote))
      res <- rbind(res, data.frame(session=tmp.trm, 
                                   bill = i, 
                                   minvote = minvotes, 
                                   ce1 = NA, 
                                   ce2 = ce2, 
                                   ce12 = NA, 
                                   n=nvotes))
    }
  }
}

legr_apre_by_term  <- res %>% group_by(session) %>% 
  summarise(apre1 = sum(minvote - ce1)/sum(minvote), 
            apre2 = sum(minvote - ce2)/sum(minvote), 
            apre12 = sum(minvote - ce12)/sum(minvote)) %>% 
  mutate(apre_diff = apre12-apre1)

legr_apre <- res %>% 
  summarise(apre1 = sum(minvote - ce1, na.rm=TRUE)/sum(minvote, na.rm=TRUE), 
            apre2 = sum(minvote - ce2, na.rm=TRUE)/sum(minvote, na.rm=TRUE), 
            apre12 = sum(minvote - ce12, na.rm=TRUE)/sum(minvote, na.rm=TRUE)) %>% 
  mutate(apre_diff = apre12-apre1)
chi_apre <- bind_rows(legr_apre %>% mutate(session = 0) %>% select(session, everything()), legr_apre_by_term) %>% 
  mutate(city = "Chicago", 
         session = factor(session, 
                          levels=0:12, 
                          labels= c("Overall", uterms))) %>% # changed terms to uterms
  mutate(apre12 = apre12-apre1) %>% 
  pivot_longer(c("apre1","apre12"), names_to="dimension", values_to="apre") 

sprintf("Percent Correctly Predicted, Dim 1: %.0f%%",( (sum(res$n, na.rm=TRUE) - sum(res$ce1, na.rm=TRUE))/sum(res$n, na.rm=TRUE))*100)
sprintf("Percent Correctly Predicted, Dim 2: %.0f%%",( (sum(res$n, na.rm=TRUE) - sum(res$ce2, na.rm=TRUE))/sum(res$n, na.rm=TRUE))*100)
sprintf("Percent Correctly Predicted, Dim1 + 2: %.0f%%",( (sum(res$n, na.rm=TRUE) - sum(res$ce12, na.rm=TRUE))/sum(res$n, na.rm=TRUE))*100)


save(chi_apre, file="temp/chi_apre.rda")

cat("Percentage of PRE\n")
legr_apre_by_term %>% 
  mutate(pct1 = round(apre1/apre12*100), 
         pct2 = round((apre12-apre1)/apre12*100), 
         session = factor(session, labels=uterms)) %>% 
  select(session, pct1, pct2) %>% 
  as.data.frame()

figures[['fig1chi_apre']] <- chi_apre %>% 
  mutate(dimension = factor(dimension, 
                            levels=c("apre12", "apre1"), 
                            labels=c("Dimension 1+2", "Dimension 1"))) %>% 
  ggplot(aes(y=apre, fill=dimension)) + 
  geom_bar(aes(x=rep(c(-.5, 1:12), each=2)), stat="identity")  + 
  theme_classic() + 
  labs(x="", y="APRE", fill="") + 
  theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.25), 
        text = element_text(size = 8),
        legend.position="bottom") + 
  scale_fill_manual(values = c("grey70", "grey30")) +
  scale_y_continuous(labels=label_percent(accuracy=1)) + 
  scale_x_continuous(breaks = c(-.5, 1:12), labels=levels(chi_apre$session)) + 
  theme(legend.position = c(.8, .9)) 



## Figure 2: Chicago Movements ----
set.seed(seed)
boot1 <- boot_emIRT(out$mods[[1]], 
                    .data=out$dats[[1]]$dat, 
                    .starts= out$starts[[1]], 
                    .priors= out$priors[[1]], 
                    Ntrials = 100, 
                    .control=list(threads = 4, verbose = TRUE, thresh = 1e-4, maxit=200, checkfreq=1))
boot2 <- boot_emIRT(out$mods[[2]], 
                    .data=out$dats[[2]]$dat, 
                    .starts= out$starts[[2]], 
                    .priors= out$priors[[2]], 
                    Ntrials = 100, 
                    .control=list(threads = 4, verbose = TRUE, thresh = 1e-4, maxit=200, checkfreq=1))

x1w <- x %>% 
  select(name, session, Dim1) %>%
  mutate(session = paste0("term_", session)) %>% 
  pivot_wider(names_from = "session", values_from = "Dim1", values_fill=NA)

x2w <- x %>% 
  select(name, session, Dim2) %>%
  mutate(session = paste0("term_", session)) %>% 
  pivot_wider(names_from = "session", values_from = "Dim2", values_fill=NA)

b1s <- boot1$bse$x
b2s <- boot2$bse$x

x1w <- x1w[match(ld$name, x1w$name), ]

x1l <- x1w %>% 
  pivot_longer(-name, names_pattern="term_(.*)", names_to="term", values_to = "vals")
x1l <- x1l %>% mutate(sd = c(t(b1s)))
x1l <- x1l %>% mutate(term = as.numeric(term))
x1l <- na.omit(x1l)
x1l <- x1l %>% 
  group_by(name) %>% 
  filter(n() >= 2)

draws <- replicate(1500, rnorm(nrow(x1l), x1l$vals, x1l$sd))
colnames(draws) <- paste0("drw", 1:1500)
x1l <- cbind(x1l, draws)
x1l2 <- x1l %>% 
  pivot_longer(starts_with('drw'), names_to = "draw", values_to = "sims")
x1l2 <- x1l2 %>% 
  group_by(name, draw) %>% 
  transmute(term = term, 
            sim_diff = sims - lag(sims), 
            val_diff = vals - lag(vals))

d1move <- x1l2 %>% group_by(name, term, val_diff) %>% 
  summarise(p = mean(sim_diff > 0)) %>% 
  mutate(p = ifelse(p < .5, 1-p, p)) %>% 
  na.omit()


x2w <- x2w[match(out$dats[[2]]$id$name, x2w$name), ]

x2l <- x2w %>% 
  pivot_longer(-name, names_pattern="term_(.*)", names_to="term", values_to = "vals")
x2l <- x2l %>% mutate(sd = c(t(b2s)))
x2l <- x2l %>% mutate(term = as.numeric(term))
x2l <- na.omit(x2l)
x2l <- x2l %>% 
  group_by(name) %>% 
  filter(n() >= 2)

draws <- replicate(1500, rnorm(nrow(x2l), x2l$vals, x2l$sd))
colnames(draws) <- paste0("drw", 1:1500)
x2l <- cbind(x2l, draws)
x2l2 <- x2l %>% 
  pivot_longer(starts_with('drw'), names_to = "draw", values_to = "sims")
x2l2 <- x2l2 %>% 
  group_by(name, draw) %>% 
  transmute(term = term, 
            sim_diff = sims - lag(sims), 
            val_diff = vals - lag(vals))

d2move <- x2l2 %>% group_by(name, term, val_diff) %>% 
  summarise(p = mean(sim_diff > 0)) %>% 
  mutate(p = ifelse(p < .5, 1-p, p)) %>% 
  na.omit()

df2 <- bind_rows(
  d1move %>% mutate(f = "Dimension 1"), 
  d2move %>% mutate(f = "Dimension 2"))



df_ag <- df2 %>% 
  mutate(cutd = cut(abs(val_diff), breaks=seq(0, 3.5, by=.25)), 
         sig = ifelse(p > .9, "Yes", "No")) %>% 
  group_by(f, cutd, sig) %>% 
  tally %>% 
  ungroup %>% 
  group_by(f) %>% 
  mutate(pct = n/sum(n), 
         x=as.numeric(cutd)*.25-.125)

figures[['fig2chi_movement']] <- ggplot(df_ag, aes(x=x, y=pct, fill=sig)) + 
  geom_bar(width=.25, alpha=.7, stat="identity", position="identity") + 
  facet_wrap(~f) + 
  theme_bw() +
  theme(legend.position = c(.95, .95 ),
        legend.justification = c("right", "top"),
        legend.key.size = unit(.125, "in"),
        axis.ticks = element_line(colour = 'black', size = 0.2),
        text = element_text(size = 8),
        strip.background = element_blank(),
        panel.grid=element_blank()) + 
  scale_fill_manual(values = c("black", "grey80")) +
  scale_y_continuous(labels = scales::label_percent()) + 
  labs(x="Difference (t - [t-1])", 
       y="Percentage of Changes", 
       fill="Credible\nChange")


## Figures 3a + 3b: Chicago Correlation Tables ----

data <- read.dta13("source/chi_ward_councilors.dta") %>%
  select(starts_with("v_") | starts_with("q_") | starts_with("s_") | termlabel | session | alderman_id ) %>% 
  rename(name = alderman_id) %>% 
  left_join(x %>% 
              mutate(#Dim2 = ifelse(session >= 6, -Dim2, Dim2), 
                Dim1 = ifelse(session < 12, Dim1, -Dim1)) %>% 
              rename(legR_D1 = Dim1, legR_D2 = Dim2)) %>% 
  select(-starts_with("Dim"), -session, -name)

race <- read.dta13("source/chi_ward_councilors.dta") %>%
  select(starts_with("race") | termlabel | session | alderman_id) %>% 
  rename(name = alderman_id) %>% 
  left_join(x %>% 
              # mutate(#Dim2 = ifelse(session >= 6, -Dim2, Dim2), 
              #        Dim1 = ifelse(session < 12, Dim1, -Dim1)) %>% 
              rename(legR_D1 = Dim1, legR_D2 = Dim2)) %>% 
  select(-starts_with("Dim"), -name)

varname_lookup <- rio::import("source/varname_lookup.xlsx") 
# %>% 
#   filter(!variable == "v_lfoprof")
varname_lookup <- bind_rows(varname_lookup, tibble(variable = "v_lfosci", varname = "% Science/Tech Jobs", vartype="status"))

byterm <- split(data, f = data$termlabel) 

byterm[["Pooled"]] <- data  # add whole dataset as pooled" item  

terms <- c("Pooled","1979-1983","1983-1987","1987-1987","1987-1989","1989-1991","1991-1995","1995-1999","1999-2003","2003-2007","2007-2011","2011-2015","2015-2019")

cormatrix <- NULL
cormatrix_d1 <- NULL
cormatrix_d2 <- NULL

for(t in terms){
  
  byterm[[t]] <- byterm[[t]] %>% select(-termlabel) # drop termlabel string variable from byterm to enable correlation
  
  cormatrix[[t]] <- correlate(byterm[[t]], quiet = TRUE) # correlate entire table
  
  cormatrix_d1[[t]] <- cormatrix[[t]] %>% # isolate correlations with Dim1_gs
    focus(legR_D1) %>%
    rename(!!paste0(t) := legR_D1)        # rename coefficient variable to termlabel
  
  cormatrix_d2[[t]] <- cormatrix[[t]] %>% # isolate correlations with Dim2_gs
    focus(legR_D2) %>%
    rename(!!paste0(t) := legR_D2)        # rename coefficient variable to termlabel
  
}

# drop coefficients >= -.2 | <= .2
# drop if more than 3 NAs in row, Dim variables

cortable_d1 <- bind_cols(cormatrix_d1) %>%
  rename(variable = term...1) %>%           # rename first variable name column
  select(!starts_with("term")) %>%          # drop duplicate variable name columns
  relocate(Pooled, .after = variable) %>% 
  dplyr::arrange(-Pooled) %>% 
  filter(!str_detect(variable, "legR")) %>% 
  left_join(varname_lookup, by = "variable") %>%
  relocate(c(vartype, varname), .after = variable) %>%
  mutate_if(is.numeric, round, 2) %>% 
  select(-variable)

cortable_d2 <- bind_cols(cormatrix_d2) %>%
  rename(variable = term...1) %>%           # rename first variable name column
  select(!starts_with("term")) %>%          # drop duplicate variable name columns
  relocate(Pooled, .after = variable) %>% 
  dplyr::arrange(-Pooled) %>% 
  filter(!str_detect(variable, "legR")) %>% 
  left_join(varname_lookup, by = "variable") %>%
  relocate(c(vartype, varname), .after = variable) %>%
  mutate_if(is.numeric, round, 2) %>% 
  select(-variable)

### Figure 3a: Chicago, D1 Correlation ----

tmp1 <- cortable_d1 %>% 
  dplyr::arrange(Pooled) %>% 
  mutate(across(3:15, ~ifelse(abs(.x) > .15, .x, NA))) %>% 
  rowwise() %>% 
  mutate(count_na = sum(is.na(c_across(3:15)))) %>% 
  ungroup %>% 
  filter(count_na <= 3) %>% 
  mutate(fac = factor(1:n(), labels=varname)) %>% 
  pivot_longer(3:15, names_to="period", values_to="vals") %>% 
  mutate(period = factor(period, levels=unique(period)),
         bg = cut(vals, c(-1, -.6, -.4, .4, .6, 1)), 
         bld = ifelse(abs(vals) >= .6, "bold", "plain")) %>% 
  na.omit() 
vt <- tmp1 %>% 
  group_by(fac) %>% 
  summarise(vartype = first(vartype))

colvals <- c("gray40", "gray70", "white", "gray70", "gray40")

figures[['fig3achi_cor_d1']] <- ggplot(tmp1, aes(x=period, y=as.numeric(fac), fill=bg)) + 
  geom_tile( show.legend = FALSE) + 
  geom_text(aes(label=sprintf("%.2f", vals), fontface=bld), size=3.5) + 
  geom_vline(xintercept=1.5, linetype=2) + 
  geom_hline(yintercept=max(as.numeric(tmp1$fac[which(tmp1$period == "Pooled" & tmp1$vals < 0)]))+.5) + 
  theme_bw() + 
  theme(panel.grid=element_blank(), 
        axis.text.x = element_text(size=10, angle=60, hjust=1), 
        axis.text.y = element_text(size=10)) + 
  scale_fill_manual(values=colvals) + 
  scale_y_continuous(breaks=seq_along(levels(tmp1$fac)), labels=levels(tmp1$fac), 
                     sec.axis = sec_axis(trans = ~ .x, breaks = seq_along(levels(tmp1$fac)), labels=vt$vartype)) + 
  labs(x="", y="") + 
  coord_cartesian(clip="off", ylim=c(.5, max(as.numeric(tmp1$fac))+.5), expand=FALSE) 

### Figure 3b: Chicago, D2 Correlation ----

tmp2 <- cortable_d2 %>% 
  dplyr::arrange(Pooled) %>% 
  mutate(across(3:15, ~ifelse(abs(.x) > .15, .x, NA))) %>% 
  rowwise() %>% 
  mutate(count_na = sum(is.na(c_across(3:15)))) %>% 
  ungroup %>% 
  filter(count_na <= 5) %>% 
  mutate(fac = factor(1:n(), labels=varname)) %>% 
  pivot_longer(3:15, names_to="period", values_to="vals") %>% 
  mutate(period = factor(period, levels=unique(period)),
         bg = cut(vals, c(-1, -.6, -.4, .4, .6, 1)), 
         bld = ifelse(abs(vals) >= .6, "bold", "plain")) %>% 
  na.omit()  %>% 
  filter(!fac == "% professional occupations")


vt <- tmp2 %>% 
  group_by(fac) %>% 
  summarise(vartype = first(vartype))
bgtab <- table(tmp2$bg)
if(any(bgtab == 0))colvals <- colvals[-which(bgtab == 0)]
tmp2$fac <- droplevels(tmp2$fac)

figures[['fig3bchi_cor_d2']] <- ggplot(tmp2, aes(x=as.numeric(period), y=as.numeric(fac), fill=bg)) + 
  geom_tile( show.legend = FALSE) + 
  geom_text(aes(label=sprintf("%.2f", vals), fontface=bld), size=3.5) + 
  geom_vline(xintercept=1.5, linetype=2) + 
  geom_hline(yintercept=max(as.numeric(tmp2$fac[which(tmp2$period == "Pooled" & tmp2$vals < 0)]))+.5) + 
  theme_bw() + 
  theme(panel.grid=element_blank(), 
        axis.text.x = element_text(size=10, angle=60, hjust=1), 
        axis.text.y = element_text(size=10)) + 
  scale_fill_manual(values=colvals) + 
  scale_y_continuous(breaks=seq_along(levels(tmp2$fac)), labels=levels(tmp2$fac), 
                     sec.axis = sec_axis(trans = ~ .x, breaks = seq_along(levels(tmp2$fac)), labels=vt$vartype)) + 
  scale_x_continuous(breaks = 1:13, labels=levels(tmp2$period)) + 
  labs(x="", y="") + 
  coord_cartesian(clip="off", xlim=c(.5,13.5), ylim=c(.5, max(as.numeric(tmp2$fac))+.5), expand=FALSE) 

## Figure 4: Chicago Race Boxplots ----
sessions <- c("1979-83\nByrne","1983-87\nWashington","1987\nWashington","1987-89\nSawyer", "1989-91\nDaley", "1991-95\nDaley", "1995-99\nDaley","1999-2003\nDaley","2003-07\nDaley","2007-11\nDaley","2011-15\nEmanuel","2015-19\nEmanuel")
names(sessions) <- c(1,2,3,4,5,6,7,8,9,10,11,12)

d1 <- ggplot(subset(race, race == "White" | race == "Black"), aes(y = race, x = legR_D1)) + 
  geom_vline(xintercept=0, linetype="dotted") +
  geom_boxplot(aes(fill = race), lwd = .25, outlier.shape = NA) +
  facet_grid(session ~ ., labeller = labeller(session=sessions)) +
  theme_bw() + 
  theme(panel.grid = element_blank(), 
        strip.background = element_blank(), 
        strip.text.y = element_blank(), 
        panel.grid.minor=element_blank(),
        text = element_text(size = 8),
        legend.position="none") +
  coord_cartesian(xlim=c(-3,3)) +
  scale_fill_manual(values = c("grey", "white")) +
  xlab("Dimension 1") + ylab("Race of Alderman")

d2 <- ggplot(subset(race, race == "White" | race == "Black"), aes(y = race, x = legR_D2)) + 
  geom_vline(xintercept=0, linetype="dotted") +
  geom_boxplot(aes(fill = race), lwd = .25, outlier.shape = NA) +
  facet_grid(session ~ ., labeller = labeller(session=sessions)) +
  theme_bw() + 
  theme(panel.grid = element_blank(), 
        axis.title.y = element_blank(), 
        strip.text.y = element_text(angle = 0), 
        axis.ticks.y = element_blank(),
        axis.text.y = element_blank(),
        panel.grid.minor=element_blank(),
        text = element_text(size = 8),
        legend.position="none") +
  coord_cartesian(xlim=c(-3,3)) +
  scale_fill_manual(values = c("grey", "white")) +
  xlab("Dimension 2") + ylab("Race of Alderman")

figures[['fig4chi_race']] <- plot_grid(d1, d2,  nrow = 1, rel_widths = c(.48,.52))

# Appendix ----
## Figure A1: Chicago Number of Bills ----

tab1 <- table(factor(out$dats[[1]]$dat$bill.session + 1, levels=1:12))
tab2 <- table(factor(out$dats[[2]]$dat$bill.session + 1, levels=1:12))
ndf <- data.frame(
  n = c(tab1, tab2), 
  session = rep(1:12, 2), 
  Dimension = rep(1:2, each=12)
)

figures[['figA1chi_votes']] <- ggplot(ndf, aes(x=as.factor(session), y=as.factor(Dimension), fill=log(n))) + 
  geom_tile(show.legend = FALSE) + 
  geom_text(aes(label = n), color="white") + 
  #  scale_fill_viridis_c() + 
  theme_classic() + 
  labs(x="Session", y="Dimension")+ 
  coord_cartesian(expand=FALSE) +
  ggtitle("Chicago")

## Figure A2: Chicago Betas ----
betas <- tibble(
  beta = c(out$mods[[1]]$means$beta, out$mods[[2]]$means$beta), 
  session = c(out$dats[[1]]$dat$bill.session, out$dats[[2]]$dat$bill.session), 
  dimension = rep(1:2, c(length(out$dats[[1]]$dat$bill.session), 
                         length(out$dats[[2]]$dat$bill.session))) 
)
figures[['figA2chi_betas']] <- ggplot(betas, aes(x=abs(beta), y=after_stat(density)*after_stat(width))) + 
  geom_histogram(bins=10) + 
  facet_grid(dimension ~ I(session+1)) + 
  theme_bw() + 
  theme(panel.grid = element_blank()) + 
  scale_x_continuous(breaks = c(0, 1, 2)) +
  scale_y_continuous(label = scales::percent) + 
  labs(x="Absolute Value of Beta", y="Percentage")  +
  ggtitle("Chicago")

## Figure C1: Chicago Voting in Majority ----

figures[['figC1chi_unanimity']] <- unan %>% 
  mutate(term = factor(term, labels=uterms)) %>% 
  ggplot(aes(x=m, group=term)) + 
  geom_histogram(aes(y = after_stat(density)*.05), 
                 binwidth=.05, col="white", boundary=.5) + 
  facet_wrap(~term, ncol = 4) + 
  theme_bw() + 
  theme(panel.grid=element_blank()) + 
  labs(x="Percentage Voting in the Majority", 
       y="Percentage of Votes in Term") + 
  scale_y_continuous(labels=label_percent(accuracy=1)) + 
  scale_x_continuous(labels=percent) +
  ggtitle("Chicago")

## Figure D1: Chicago Moran's I ----

chi_wardcensus <- read.dta13("source/chi_ward_data.dta")

### ingest ward shapefiles

chi_wards <- NULL

for (i in c("1970", "1981", "1986", "1992", "1998", "2002", "2012")) {
  
  chi_wards[[i]] <- st_read(paste0("source/ward_shp/chi_ward", i, ".shp")) %>%
    mutate(wyear = as.numeric(i)) %>%
    rename(ward = 1) %>%
    left_join(chi_wardcensus, by = c("ward", "wyear")) 
  
}

items <- c("w1970_c1980", "w1981_c1980", "w1986_c1990", "w1992_c1990", "w1998_c2000", "w2002_c2000", "w2012_c2010")

names(chi_wards) <- items

chi_wards$w2002_c2000 <- chi_wards$w2002_c2000 %>% filter(cyear == 2000) # remove 2010 data

varname_lookup <- rio::import("source/varname_lookup.xlsx")

# set parameters

moran.summary <- NULL


for (item in items) {
  
  shp = paste0("chi_wards$", item)
  prefix = c("v_", "q_", "s_")
  weighttype = "S"
  
  # convert to sp object
  shp.sp <- eval(parse(text = shp)) %>% as_Spatial()
  
  # retrieve desired column names to use
  vars <- shp.sp@data %>% select(starts_with(prefix)) %>% colnames()
  
  # make weights table
  nb <- poly2nb(shp.sp, queen = FALSE)    # queen = FALSE means an edge is shared
  w <- nb2listw(nb, style = weighttype, zero.policy = TRUE) 
  
  # calculate Moran's I for all variables using Monte Carlo method
  moran <- NULL
  for(v in vars) {
    moran[[v]] <- moran.mc(eval(parse(text = paste0("shp.sp@data$", v))), 
                           listw = w, nsim = 499, zero.policy = TRUE, na.action = na.exclude)
  }
  
  # create moran summary table by variable
  moran.summary[[item]] <- data.frame(matrix(unlist(moran), nrow = length(moran), byrow = TRUE), row.names = vars) %>%
    select(c(X1, X3)) %>%
    rename("moran" = "X1") %>%
    rename("moranp" = "X3") %>%
    mutate(variable = rownames(.)) %>%
    mutate("rank" = dense_rank(desc(moran))) %>%
    dplyr::arrange(rank) %>%
    distinct(moran, variable, rank, .keep_all = TRUE) %>%
    filter(moran != "NaN") %>%
    mutate(moran = round(as.numeric(moran), 3)) %>%
    rename(!!paste0(item) := moran) %>%
    select(-rank, -moranp)
}

moran.combine <- moran.summary %>%
  reduce(left_join, by = "variable") %>%
  left_join(varname_lookup, by = "variable") %>%
  select(-vartype) %>%
  filter(!is.na(varname)) %>%
  rowwise() %>%
  mutate(sd = round(sd(c_across(starts_with("w"))),3)) %>%
  relocate(varname, variable, sd) %>%
  distinct()

# sparklines of moran coefficients

cols <- c("varname", "variable", "sd", "w1970\nC1980", "w1981\nC1980", "w1986\nC1990", "w1992\nC1990", "w1998\nC2000", "w2002\nC2000", "w2012\nC2010")

vars <- moran.combine$variable

sparklines <- NULL

for (i in vars) {
  
  varname = varname_lookup %>%
    filter(variable == i) %>%
    select(varname) %>%
    as_vector()
  
  table <- moran.combine 
  names(table) <- cols
  
  table <- table %>% 
    pivot_longer(cols = starts_with("w"), names_to = "WardScheme", names_prefix = "w", values_to = "I", values_drop_na = TRUE) %>%
    filter(variable == i) %>%
    #mutate(WardScheme = as.numeric(WardScheme)) %>%
    select(varname, WardScheme, I) %>%
    mutate(max = ifelse(I == max(I), I, NA)) %>%
    mutate(min = ifelse(I == min(I), I, NA))
  
  sparklines[[i]] <- ggplot() +
    geom_hline(yintercept = mean(table$I), colour = "grey", linetype = "dashed", size = .5) +
    geom_line(data = table,  aes(x = WardScheme, y = I, group = 1)) +
    geom_point(data = table, aes(x = WardScheme, y = I), size = 1, colour = alpha("black", 1)) +
    geom_point(data = table, aes(x = WardScheme, y = max), size = 3, colour = alpha("red", 1)) +
    geom_point(data = table, aes(x = WardScheme, y = min), size = 3, colour = alpha("blue", 1)) +
    geom_text(data = table, aes(x = WardScheme, y = max, label = max), size = 3, colour = alpha("red", 1), check_overlap = TRUE, vjust = -1) +
    geom_text(data = table, aes(x = WardScheme, y = min, label = min), size = 3, colour = alpha("blue", 1), check_overlap = TRUE, vjust = 2) +
    scale_y_continuous(expand = expansion(add = .15)) +
    labs(title = varname, ylab = "Moran's I") +
    theme_classic() +
    theme(axis.title.y = element_text(angle = 90),
          axis.line = element_blank(),
          axis.ticks.y = element_blank(),
          axis.text.y = element_blank())
  
}

# assemble

figures[['figD1chi_moran']] <- ggarrange(sparklines$v_vm_visiblcknhsp + rremove("ylab") + rremove("xlab") + rremove("x.text") + rremove("x.ticks"), 
                                         sparklines$v_vm_nvisnhsp + rremove("ylab") + rremove("xlab") + rremove("x.text") + rremove("x.ticks"), 
                                         sparklines$v_povpers + rremove("ylab") + rremove("xlab") + rremove("x.text") + rremove("x.ticks"),
                                         sparklines$v_ethcaripuri + rremove("ylab") + rremove("xlab") + rremove("x.text") + rremove("x.ticks"), 
                                         sparklines$q_inhtotl_med + rremove("ylab") + rremove("xlab") + rremove("x.text") + rremove("x.ticks"), 
                                         sparklines$v_jwmauto + rremove("ylab") + rremove("xlab") + rremove("x.text") + rremove("x.ticks"), 
                                         sparklines$v_ethcaricuba + rremove("ylab"), 
                                         sparklines$v_ethltammexi + rremove("ylab"),
                                         ncol = 2, nrow = 4)

## Figure E1: Correlations Using Different RNG Seeds (Chicago) ----

terms <- c("1979-83 Byrne","1983-87 Washington","1987 Washington","1987-89 Sawyer", "1989-91 Daley", "1991-95 Daley", "1995-99 Daley","1999-2003 Daley","2003-07 Daley","2007-11 Daley","2011-15 Emanuel","2015-19 Emanuel")
names(terms) <- c(1,2,3,4,5,6,7,8,9,10,11,12)
termlabels <- as_tibble_col(terms, column_name = "termlabel") %>% 
  mutate(session = row_number())

# Load data

load("source/chi_rollcalls.rda") # locads rcmat

data <- read.dta13("source/chi_ward_councilors.dta") %>%
  select(starts_with("v_") | starts_with("q_") | starts_with("s_") | termlabel | session | alderman_id ) %>% 
  rename(name = alderman_id) 

# Initialize list
r1 <- r2 <- vector(mode="list", length=sims)

# Simulation loop
i <- k <- 1
while(k <= sims){

  print(paste0("Chicago Iteration: ", i))
  print(paste0("Seed: ", seeds[i]))
  print(Sys.time())

  # h2o::h2o.removeAll()
  # h2o::h2o.shutdown(prompt = FALSE)
  # h2o.init()

  ## run legR
  ld <- data.frame(id = rownames(rcmat), name = rownames(rcmat))
  orthogonal <- TRUE
  out <- try(legR(
    rcmat,
    terms=trm,
    legis_data=ld,
    nperterm=10,
    minprop=.05,
    nRounds = 1,
    othermax=NULL,
    k=2,
    seed=seeds[i],
    ndim=2,
    method="glrm",
    max_mem_size="8g",
    est_model = FALSE
  ))
  if(inherits(out, "try-error")){
    i <- i+1
  }else{
    ## set polarity anchors on priors
    priors <- out$priors
    priors[[1]]$x.mu0 <- matrix(0, ncol=1, nrow=nrow(priors[[1]]$x.mu0 ))
    priors[[2]]$x.mu0 <- matrix(0, ncol=1, nrow=nrow(priors[[2]]$x.mu0 ))
    priors[[1]]$x.sigma0 <- matrix(1, ncol=1, nrow=nrow(priors[[1]]$x.sigma0 ))
    priors[[2]]$x.sigma0 <- matrix(1, ncol=1, nrow=nrow(priors[[2]]$x.sigma0 ))
    priors[[1]]$x.mu0[which(out$dats[[1]]$id$name == "Madrzyk_JS"),1] <- 1
    priors[[1]]$x.mu0[which(out$dats[[1]]$id$name == "Streeter_A"),1] <- -1
    priors[[2]]$x.mu0[which(out$dats[[2]]$id$name == "Hagopian_GJ"),1] <- 1
    priors[[2]]$x.mu0[which(out$dats[[2]]$id$name == "Oberman_MJ"),1] <- -1
    priors[[1]]$x.sigma0[which(out$dats[[1]]$id$name %in% c("Streeter_A", "Madrzyk_JS"))] <- .1
    priors[[2]]$x.sigma0[which(out$dats[[2]]$id$name %in% c("Oberman_MJ", "Hagopian_GJ"))] <- .1

    out <- try(legR(
      rcmat,
      terms=trm,
      legis_data=ld,
      priors = priors,
      nperterm=10,
      minprop=.05,
      nRounds = 1,
      othermax=NULL,
      k=2,
      seed=seeds[i],
      ndim=2,
      method="glrm",
      max_mem_size="8g",
      est_model = TRUE
    ))
    if(inherits(out, "try-error")){
      i <- i+1
    }else{
      ## Collect results, ensure consistent polarity across time and orthogonalize
      x <- try(gather_data(out))
      if(inherits(x, "try-error")){
        i <- i+1
      }else{
      x <- x %>% dplyr::filter(!((Dim1 == 0 | is.na(Dim1)) & (Dim2 == 0 | is.na(Dim2))))
      x <- x %>% arrange(session, name)
      flipx <- flip(x, id="name", vars=c("Dim1", "Dim2"), time="session")
      x <- left_join(x, flipx %>% mutate(session = as.numeric(session)))
      ## use the - in front of Dim1_gs*flip1 to make it positively related to mqs
      x <- x %>% mutate(Dim1 = -Dim1*flip1,
                        Dim2 = Dim2*flip2,
                        Dim1 = -Dim1) %>%
        select(-contains("flip"))
      #   %>%
      #   mutate(Dim2 = ifelse(session %in% 5:7, -Dim2, Dim2))
      xs <- x %>% group_split(session) %>% map(function(x){
        o <- orthogonalize(x)
        o <- o %>% select(-Dim1, -Dim2) %>% rename(Dim1=Dim1_gs, Dim2=Dim2_gs)
        o})
      x <- bind_rows(xs)

      # x1 <- x %>% select(-Dim2) %>% pivot_wider(names_from="session", values_from="Dim1", names_prefix="s")
      # cor(x1[,-1], use="pair")
      # x2 <- x %>% select(-Dim1) %>% pivot_wider(names_from="session", values_from="Dim2", names_prefix="s")
      # cor(x2[,-1], use="pair")


      # Assemble correlation matrices

      data <- read.dta13("source/chi_ward_councilors.dta") %>%
        select(starts_with("v_") | starts_with("q_") | starts_with("s_") | termlabel | session | alderman_id ) %>%
        rename(name = alderman_id) %>%
        left_join(x, by=join_by(session == session, name==name) )


      byterm <- split(data, f = data$termlabel)

      byterm[["Pooled"]] <- data  # add whole dataset as pooled" item

      terms <- c("Pooled","1979-1983","1983-1987","1987-1987","1987-1989","1989-1991","1991-1995","1995-1999","1999-2003","2003-2007","2007-2011","2011-2015","2015-2019")

      cormatrix <- NULL
      cormatrix_d1 <- NULL
      cormatrix_d2 <- NULL

      for(t in terms){

        byterm[[t]] <- byterm[[t]] %>% select(-termlabel) # drop termlabel string variable from byterm to enable correlation

        cormatrix[[t]] <- correlate(byterm[[t]], quiet = TRUE) # correlate entire table

        cormatrix_d1[[t]] <- cormatrix[[t]] %>% # isolate correlations with Dim1_gs
          focus(Dim1) %>%
          rename(!!paste0(t) := Dim1) %>%       # rename coefficient variable to termlabel
          filter(term != "Dim2")

        cormatrix_d2[[t]] <- cormatrix[[t]] %>% # isolate correlations with Dim2_gs
          focus(Dim2) %>%
          rename(!!paste0(t) := Dim2) %>% # rename coefficient variable to termlabel
          filter(term != "Dim1")

      }

      # drop coefficients >= -.2 | <= .2
      # drop if more than 3 NAs in row, Dim variables

      cortable_d1 <- bind_cols(cormatrix_d1) %>%
        rename(variable = term...1) %>%           # rename first variable name column
        select(!starts_with("term")) %>%          # drop duplicate variable name columns
        relocate(Pooled, .after = variable) %>%
        arrange(-Pooled)

      r <- sapply(3:14, function(i)mean(sign(cortable_d1[,3]) == sign(cortable_d1[,i]), na.rm=TRUE))

      flips <- (3:14)[which(r < .5)]
      if(length(flips) > 0){
        cortable_d1 <- cortable_d1 %>%
          mutate(across(all_of(names(cortable_d1)[flips]), ~-.x))
      }

      r1[[k]] <- cortable_d1

      cortable_d2 <- bind_cols(cormatrix_d2) %>%
        rename(variable = term...1) %>%           # rename first variable name column
        select(!starts_with("term")) %>%          # drop duplicate variable name columns
        relocate(Pooled, .after = variable) %>%
        arrange(-Pooled)

      r <- sapply(3:14, function(i) mean(sign(cortable_d2[,3]) == sign(cortable_d2[,i]), na.rm=TRUE))

      flips <- (3:14)[which(r < .5)]
      if(length(flips) > 0){
        cortable_d2 <- cortable_d2 %>%
          mutate(across(all_of(names(cortable_d2)[flips]), ~-.x))
      }

      r2[[k]] <- cortable_d2
      i <- i+1
      k <- k+1
      }
    }
  }
} # end simulation loop

load("temp/chi_baseline.rda")

data <- read.dta13("source/chi_ward_councilors.dta") %>%
  select(starts_with("v_") | starts_with("q_") | starts_with("s_") | termlabel | session | alderman_id ) %>%
  rename(name = alderman_id)
data1 <-  chi_baseline %>%
  right_join(data)

terms <- c("Pooled","1979-1983","1983-1987","1987-1987","1987-1989","1989-1991","1991-1995","1995-1999","1999-2003","2003-2007","2007-2011","2011-2015","2015-2019")


byterm <- split(data1, f = data1$termlabel)
byterm[["Pooled"]] <- data1  # add whole dataset as "pooled" item


cormatrix <- NULL
cormatrix_d1 <- NULL
cormatrix_d2 <- NULL

for(t in terms){

  byterm[[t]] <- byterm[[t]] %>%
    select(-c(name, session, termlabel)) # drop termlabel string variable from byterm to enable correlation

  cormatrix[[t]] <- correlate(byterm[[t]], quiet = TRUE) # correlate entire table

  cormatrix_d1[[t]] <- cormatrix[[t]] %>% # isolate correlations with Dim1_gs
    focus(Dim1) %>%
    filter(!grepl("Dim2", term)) %>%
    rename(!!paste0(t) := Dim1)        # rename coefficient variable to termlabel

  cormatrix_d2[[t]] <- cormatrix[[t]] %>% # isolate correlations with Dim2_gs
    focus(Dim2) %>%
    filter(!grepl("Dim1", term)) %>%
    rename(!!paste0(t) := Dim2)        # rename coefficient variable to termlabel

}

# Build Dim1 correlations by session
# join on variable names to make table across terms

cortable_d1 <- bind_cols(cormatrix_d1) %>%
  rename(variable = term...1) %>%           # rename first variable name column
  select(!starts_with("term")) %>%          # drop duplicate variable name columns
  relocate(Pooled, .after = variable)

r <- sapply(3:14, function(i)mean(sign(cortable_d1[,3]) == sign(cortable_d1[,i]), na.rm=TRUE))

flips <- (3:14)[which(r < .5)]
if(length(flips) > 0){
  cortable_d1 <- cortable_d1 %>%
    mutate(across(all_of(names(cortable_d1)[flips]), ~-.x))
}

baseline_d1 <- cortable_d1

# Build Dim2 correlations by session
# join on variable names to make table across terms

cortable_d2 <- bind_cols(cormatrix_d2) %>%
  rename(variable = term...1) %>%           # rename first variable name column
  select(!starts_with("term")) %>%          # drop duplicate variable name columns
  relocate(Pooled, .after = variable)

r <- sapply(3:14, function(i) mean(sign(cortable_d2[,3]) == sign(cortable_d2[,i]), na.rm=TRUE))

flips <- (3:14)[which(r < .5)]
if(length(flips) > 0){
  cortable_d2 <- cortable_d2 %>%
    mutate(across(all_of(names(cortable_d2)[flips]), ~-.x))
}

baseline_d2 <- cortable_d2


r1_comp <- r2_comp <- list()
k <- 1
for(i in seq_along(r1)){
  r1_comp[[k]] <- baseline_d1 %>% pivot_longer(-variable) %>% rename(baseline = value) %>% left_join(r1[[i]] %>% pivot_longer(-variable))
  r2_comp[[k]] <- baseline_d2 %>% pivot_longer(-variable) %>% rename(baseline = value) %>% left_join(r2[[i]] %>% pivot_longer(-variable))
  k <- k+1
}

r1_comp <- r1_comp %>% bind_rows(.id="sim")
r2_comp <- r2_comp %>% bind_rows(.id="sim")

r1_comp <- r1_comp %>%
  mutate(baseline_cat = cut(abs(baseline),
                            breaks=seq(0,1, by=.1),
                            include.lowest = TRUE,
                            right = TRUE))

r2_comp <- r2_comp %>%
  mutate(baseline_cat = cut(abs(baseline),
                            breaks=seq(0,1, by=.1),
                            include.lowest = TRUE,
                            right = TRUE))

r1_comp <- r1_comp %>%
  mutate(baseline_pt = ifelse(sim == 1, baseline, NA))
r2_comp <- r2_comp %>%
  mutate(baseline_pt = ifelse(sim == 1, baseline, NA))

figures[['figE1chi_r1']] <- ggplot(r1_comp %>% na.omit(),
                                   aes(x=abs(value), y=baseline_cat)) +
  geom_density_ridges(scale=.9, fill="gray90") +
  geom_point(aes(x=abs(baseline_pt)), position=position_jitter(height=.1), alpha=.25) +
  theme_classic() +
  labs(x="Absolute Correlations from Simulation",
       y = "Absolute Correlation of Original")

figures[['figE1chi_r2']] <- ggplot(r2_comp %>% na.omit(),
                                   aes(x=abs(value), y=baseline_cat)) +
  geom_density_ridges(scale=.9, fill="gray90") +
  geom_point(aes(x=abs(baseline_pt)), position=position_jitter(height=.1), alpha=.25) +
  theme_classic() +
  labs(x="Absolute Correlations from Simulation",
       y = "Absolute Correlation of Original")



# NOMINATE Diagnostic Results Original and Curated Data ----

load("source/chi_rollcalls.rda")
h_all <- data.frame(name = rownames(rcmat), party="")
chi_votes <- list()
for(i in 1:12){
  tmpX <- rcmat[,which(trm == i)]
  small_col <- apply(tmpX, 2, function(x)sum(!is.na(x), na.rm=TRUE))
  if(any(small_col < .5*max(small_col))){
    tmpX <- tmpX[, -which(small_col < .5*max(small_col, na.rm=TRUE))]
  }
  small_vote <- apply(tmpX, 1, function(x)sum(!is.na(x)))
  ld <- h_all[-which(small_vote < 25), ]
  ld$party <- "none"
  ld <- ld %>% select(name, party) %>% as.data.frame()
  tmpX <- tmpX[-which(small_vote < 25), ]
  sdx <- apply(tmpX, 2, sd, na.rm=TRUE)
  if(any(sdx == 0)){
    tmpX <- tmpX[,-which(sdx == 0)]
  }
  chi_votes[[i]] <- rollcall(tmpX, legis.names=ld$name, legis.data = ld, desc = paste0("Chicago City Council Votes for Session ", i))
}

absence <- lapply(chi_votes, function(x)rowMeans(is.na(x$votes)))
mean(c(unlist(absence)))
absence_n <- lapply(chi_votes, function(x)rowSums(is.na(x$votes)))
sum(c(unlist(absence_n)))


dw_chi <- dwnominate(chi_votes, minvotes=5, lop=.01)

chi_wnres <- list()
for(i in 1:12){
  chi_wnres[[i]] <- dwnominate:::auto_wnominate(chi_votes[[i]], minvotes=5, lop=.01)
}

for(i in 1:12){
  chi_wnres[[i]]$legislators$session <- i
}

chi_rc <- rollcall(rcmat, legis.names=h_all$name, legis.data = h_all)

chi_wn_one <- dwnominate:::auto_wnominate(chi_rc, minvotes=5, lop=.01)

load("temp/chi_legr_out.rda")

chi_curated_d1 <- chi_legr_out$dats[[1]]$dat$rc
rownames(chi_curated_d1) <- chi_legr_out$dats[[1]]$id$name
colnames(chi_curated_d1) <- paste0("V", 1:ncol(chi_curated_d1))
chi_curated_d1[which(chi_curated_d1 == 0, arr.ind=TRUE)] <- NA
chi_curated_d1[which(chi_curated_d1 == -1, arr.ind=TRUE)] <- 0
chi_curated_d2 <- chi_legr_out$dats[[2]]$dat$rc
rownames(chi_curated_d2) <- chi_legr_out$dats[[2]]$id$name
colnames(chi_curated_d2) <- paste0("V", ncol(chi_curated_d1)+c(1:ncol(chi_curated_d2)))
chi_curated_d2[which(chi_curated_d2 == 0, arr.ind=TRUE)] <- NA
chi_curated_d2[which(chi_curated_d2 == -1, arr.ind=TRUE)] <- 0
chi_curated_all <- full_join(as_tibble(chi_curated_d1, rownames="name"), 
                             as_tibble(chi_curated_d2, rownames="name"))
nms <- chi_curated_all$name
chi_curated_all <- as.matrix(chi_curated_all[,-1])
rownames(chi_curated_all) <- nms
chi_t1 <- chi_legr_out$dats[[1]]$dat$bill.session+1
chi_t2 <- chi_legr_out$dats[[2]]$dat$bill.session+1
chi_tall <- c(chi_t1, chi_t2)

chi_ld1 <- data.frame(name = rownames(chi_curated_d1), party="none")
chi_ld2 <- data.frame(name = rownames(chi_curated_d2), party="none")
chi_lda <- data.frame(name = rownames(chi_curated_all), party="none")
chi_c1 <- chi_c2 <- chi_c_all <- list()
for(i in 1:12){
  tmpX1 <- chi_curated_d1[,which(chi_t1 == i)]
  tmpX2 <- chi_curated_d2[,which(chi_t2 == i)]
  tmpXa <- chi_curated_all[,which(chi_tall == i)]
  small_col1 <- apply(tmpX1, 2, function(x)sum(!is.na(x), na.rm=TRUE))
  small_col2 <- apply(tmpX2, 2, function(x)sum(!is.na(x), na.rm=TRUE))
  small_cola <- apply(tmpXa, 2, function(x)sum(!is.na(x), na.rm=TRUE))
  if(any(small_col1 < .5*max(small_col1))){
    tmpX1 <- tmpX1[, -which(small_col1 < .5*max(small_col1, na.rm=TRUE))]
  }
  if(any(small_col2 < .5*max(small_col2))){
    tmpX2 <- tmpX2[, -which(small_col2 < .5*max(small_col2, na.rm=TRUE))]
  }
  if(any(small_cola < .5*max(small_cola))){
    tmpXa <- tmpXa[, -which(small_cola < .5*max(small_cola, na.rm=TRUE))]
  }
  
  small_vote1 <- apply(tmpX1, 1, function(x)sum(!is.na(x)))
  small_vote2 <- apply(tmpX2, 1, function(x)sum(!is.na(x)))
  small_votea <- apply(tmpXa, 1, function(x)sum(!is.na(x)))
  ld1 <- chi_ld1[-which(small_vote1 < 10), ]
  ld2 <- chi_ld2[-which(small_vote2 < 10), ]
  lda <- chi_lda[-which(small_votea < 10), ]
  #  ld1$party <- ld2$party <- lda$party <- "none"
  ld1 <- ld1 %>% select(name, party) %>% as.data.frame()
  ld2 <- ld2 %>% select(name, party) %>% as.data.frame()
  lda <- lda %>% select(name, party) %>% as.data.frame()
  tmpX1 <- tmpX1[-which(small_vote1 < 10), ]
  tmpX2 <- tmpX2[-which(small_vote2 < 10), ]
  tmpXa <- tmpXa[-which(small_votea < 10), ]
  sdx1 <- apply(tmpX1, 2, sd, na.rm=TRUE)
  sdx2 <- apply(tmpX2, 2, sd, na.rm=TRUE)
  sdxa <- apply(tmpXa, 2, sd, na.rm=TRUE)
  if(any(sdx1 == 0)){
    tmpX1 <- tmpX1[,-which(sdx1 == 0)]
  }
  if(any(sdx2 == 0)){
    tmpX2 <- tmpX2[,-which(sdx2 == 0)]
  }
  if(any(sdxa == 0)){
    tmpXa <- tmpXa[,-which(sdxa == 0)]
  }
  
  chi_c1[[i]] <- rollcall(tmpX1, legis.names=ld1$name, legis.data = ld1, desc = paste0("Toronto City Council Votes for Session ", i))
  chi_c2[[i]] <- rollcall(tmpX2, legis.names=ld2$name, legis.data = ld2, desc = paste0("Toronto City Council Votes for Session ", i))
  chi_c_all[[i]] <- rollcall(tmpXa, legis.names=lda$name, legis.data = lda, desc = paste0("Toronto City Council Votes for Session ", i))
}


# dw_chi_curated_d1 <- dwnominate(chi_c1, minvotes=5, lop=.01, dims = 1)
# dw_chi_curated_d2 <- dwnominate(chi_c2, minvotes=5, lop=.01, dims = 1, minvotes=5)
dw_chi_curated_dall <- dwnominate(chi_c_all, minvotes=5, lop=.01, dims = 2)

save(chi_wnres, chi_wn_one, dw_chi, dw_chi_curated_dall, 
     file="temp/chicago_nominate.rda")

load("temp/chicago_nominate.rda")
chi_dw_latent <- dw_chi$legislators %>% select(session, name, coord1D, coord2D) %>% rename(Dim1_dw=coord1D, Dim2_dw=coord2D)
chi_wn1_latent <- chi_wn_one$legislators %>% select(name, coord1D, coord2D) %>% rename(Dim1_wn1=coord1D, Dim2_wn1=coord2D) 

chi_wn_latent <- purrr::map(chi_wnres, \(x)x$legislators %>% select(c(name, coord1D, coord2D)) %>% rename(Dim1_wn=coord1D, Dim2_wn=coord2D)) %>% 
  bind_rows(.id="session")
rownames(chi_wn_latent) <- NULL
chi_dw_call_latent <- dw_chi_curated_dall$legislators %>% select(session, name, coord1D, coord2D) %>% rename(Dim1_dwca=coord1D, Dim2_dwca=coord2D)
chi_dw_call_latent <- chi_dw_call_latent %>% arrange(session, name)

tmp2 <- chi_dw_call_latent %>% 
  setNames(c('session', 'name', 'Dim1', 'Dim2')) %>% 
  dplyr::group_split(session) %>% 
  purrr::map(function(x){
    o <- legR::orthogonalize(x)
    o <- o %>% dplyr::select(-Dim1, -Dim2) %>% dplyr::rename(Dim1_dwca=Dim1_gs, Dim2_dwca=Dim2_gs)
    o})
chi_dw_call_latent <- bind_rows(tmp2)


chi_latent <- full_join(chi_dw_latent, chi_wn_latent %>% mutate(session = as.numeric(session)))
chi_latent <- left_join(chi_latent, chi_wn1_latent)
chi_latent <- left_join(chi_latent, chi_dw_call_latent)

chi_latent <- chi_latent %>% mutate(across(c(Dim1_wn1, Dim2_wn), ~-.x))
save(chi_latent, file="temp/chi_latent.rda")



load("source/chi_rollcalls.rda")

pb <- progress_bar$new(total = ncol(rcmat))
res <- NULL
for(i in 1:ncol(rcmat)){
  pb$tick()
  tmp.trm <- trm[i]
  dat <- tibble(vote = rcmat[,i], name = rownames(rcmat))
  lat <- chi_latent %>% filter(session == tmp.trm)
  dat <- inner_join(dat, lat, by="name") %>% na.omit()
  if(sum(dat$vote == 1, na.rm=TRUE) > 0 & sum(dat$vote == 0, na.rm=TRUE) > 0){
    m1a <- suppressWarnings(glm(vote ~ Dim1_dw, data=dat, family=binomial))
    m2a <- suppressWarnings(glm(vote ~ Dim2_dw, data=dat, family=binomial))
    m12a <- suppressWarnings(glm(vote ~ Dim1_dw + Dim2_dw, data=dat, family=binomial))
    m1b <- suppressWarnings(glm(vote ~ Dim1_wn, data=dat, family=binomial))
    m2b <- suppressWarnings(glm(vote ~ Dim2_wn, data=dat, family=binomial))
    m12b <- suppressWarnings(glm(vote ~ Dim1_wn + Dim2_wn, data=dat, family=binomial))
    m1c <- suppressWarnings(glm(vote ~ Dim1_wn1, data=dat, family=binomial))
    m2c <- suppressWarnings(glm(vote ~ Dim2_wn1, data=dat, family=binomial))
    m12c <- suppressWarnings(glm(vote ~ Dim1_wn1 + Dim2_wn1, data=dat, family=binomial))
    m1e <- suppressWarnings(glm(vote ~ Dim1_dwca, data=dat, family=binomial))
    m2e <- suppressWarnings(glm(vote ~ Dim2_dwca, data=dat, family=binomial))
    m12e <- suppressWarnings(glm(vote ~ Dim1_dwca + Dim2_dwca, data=dat, family=binomial))
    ce1a <- sum(m1a$y != round(m1a$fitted))
    ce2a <- sum(m2a$y != round(m2a$fitted))
    ce12a <- sum(m12a$y != round(m12a$fitted))
    ce1b <- sum(m1b$y != round(m1b$fitted))
    ce2b <- sum(m2b$y != round(m2b$fitted))
    ce12b <- sum(m12b$y != round(m12b$fitted))
    ce1c <- sum(m1c$y != round(m1c$fitted))
    ce2c <- sum(m2c$y != round(m2c$fitted))
    ce12c <- sum(m12c$y != round(m12c$fitted))
    ce1e <- sum(m1e$y != round(m1e$fitted))
    ce2e <- sum(m2e$y != round(m2e$fitted))
    ce12e <- sum(m12e$y != round(m12e$fitted))
    minvotes <- min(table(na.omit(dat$vote)))
    nvotes <- sum(!is.na(dat$vote))
    res <- rbind(res, data.frame(session=tmp.trm, 
                                 minvote = minvotes, 
                                 ce1_dw = ce1a, 
                                 ce2_dw = ce2a, 
                                 ce12_dw = ce12a,
                                 ce1_wn = ce1b, 
                                 ce2_wn = ce2b, 
                                 ce12_wn = ce12b,
                                 ce1_wn1 = ce1c, 
                                 ce2_wn1 = ce2c, 
                                 ce12_wn1 = ce12c,
                                 ce1_dwca = ce1e, 
                                 ce2_dwca = ce2e, 
                                 ce12_dwca = ce12e, 
                                 n=nvotes))
  }
}
chi_dw_apre_by_term  <- res %>% group_by(session) %>% 
  summarise(apre1_dw = sum(minvote - ce1_dw)/sum(minvote), 
            apre2_dw = sum(minvote - ce2_dw)/sum(minvote), 
            apre12_dw = sum(minvote - ce12_dw)/sum(minvote)) %>% 
  mutate(apre_diff_dw = apre12_dw-apre1_dw)

chi_dw_apre <- res %>% 
  summarise(apre1_dw = sum(minvote - ce1_dw, na.rm=TRUE)/sum(minvote, na.rm=TRUE), 
            apre2_dw = sum(minvote - ce2_dw, na.rm=TRUE)/sum(minvote, na.rm=TRUE), 
            apre12_dw = sum(minvote - ce12_dw, na.rm=TRUE)/sum(minvote, na.rm=TRUE)) %>% 
  mutate(apre_diff_dw = apre12_dw-apre1_dw)

chi_wn_apre_by_term  <- res %>% group_by(session) %>% 
  summarise(apre1_wn = sum(minvote - ce1_wn)/sum(minvote), 
            apre2_wn = sum(minvote - ce2_wn)/sum(minvote), 
            apre12_wn = sum(minvote - ce12_wn)/sum(minvote)) %>% 
  mutate(apre_diff_wn = apre12_wn-apre1_wn)

chi_wn_apre <- res %>% 
  summarise(apre1_wn = sum(minvote - ce1_wn, na.rm=TRUE)/sum(minvote, na.rm=TRUE), 
            apre2_wn = sum(minvote - ce2_wn, na.rm=TRUE)/sum(minvote, na.rm=TRUE), 
            apre12_wn = sum(minvote - ce12_wn, na.rm=TRUE)/sum(minvote, na.rm=TRUE)) %>% 
  mutate(apre_diff_wn = apre12_wn-apre1_wn)

chi_wn1_apre_by_term  <- res %>% group_by(session) %>% 
  summarise(apre1_wn1 = sum(minvote - ce1_wn1)/sum(minvote), 
            apre2_wn1 = sum(minvote - ce2_wn1)/sum(minvote), 
            apre12_wn1 = sum(minvote - ce12_wn1)/sum(minvote)) %>% 
  mutate(apre_diff_wn1 = apre12_wn1-apre1_wn1)

chi_wn1_apre <- res %>% 
  summarise(apre1_wn1 = sum(minvote - ce1_wn1, na.rm=TRUE)/sum(minvote, na.rm=TRUE), 
            apre2_wn1 = sum(minvote - ce2_wn1, na.rm=TRUE)/sum(minvote, na.rm=TRUE), 
            apre12_wn1 = sum(minvote - ce12_wn1, na.rm=TRUE)/sum(minvote, na.rm=TRUE)) %>% 
  mutate(apre_diff_wn1 = apre12_wn1-apre1_wn1)


chi_nom_apre <- bind_cols(bind_rows(chi_dw_apre %>% mutate(session = 0) %>% select(session, everything()), chi_dw_apre_by_term),
                          bind_rows(chi_wn_apre %>% mutate(session = 0) %>% select(session, everything()), chi_wn_apre_by_term) %>% select(-session),
                          bind_rows(chi_wn1_apre %>% mutate(session = 0) %>% select(session, everything()), chi_wn1_apre_by_term) %>% select(-session)) %>% 
  mutate(city = "Chicago", 
         session = factor(session, 
                          levels= 0:12, 
                          labels= c("Overall", "1979-83 Byrne","1983-87 Washington","1987 Washington",
                                    "1987-89 Sawyer", "1989-91 Daley", "1991-95 Daley", "1995-99 Daley",
                                    "1999-2003 Daley","2003-07 Daley","2007-11 Daley","2011-15 Emanuel",
                                    "2015-19 Emanuel"))) %>% 
  select(-starts_with("apre2"), -starts_with("apre_d")) %>% 
  mutate(apre12_dw = apre12_dw-apre1_dw, 
         apre12_wn = apre12_wn-apre1_wn, 
         apre12_wn1 = apre12_wn1-apre1_wn1) %>% 
  pivot_longer(starts_with("apre1"), names_pattern = "(.*)_(.*)", names_to=c("dimension", "model"), values_to="apre") %>% 
  mutate(dimension = factor(dimension, 
                            levels=c("apre12", "apre1"), 
                            labels=c("Dimension 1+2", "Dimension 1")), 
         model = factor(model, levels=c("dw", "wn", "wn1"), 
                        labels=c("DW-NOMINATE", "W-NOMINATE (term-by-term)", "W-NOMINATE (one-shot)"))) %>% 
  mutate(session_num = as.numeric(session)-1, 
         session_num = ifelse(session_num  ==0, -.5, session_num))



save(chi_nom_apre, file="temp/chi_nom_apre.rda")


chi_dwca_apre <- res %>% 
  summarise(apre1_dwca = sum(minvote - ce1_dwca, na.rm=TRUE)/sum(minvote, na.rm=TRUE), 
            apre2_dwca = sum(minvote - ce2_dwca, na.rm=TRUE)/sum(minvote, na.rm=TRUE), 
            apre12_dwca = sum(minvote - ce12_dwca, na.rm=TRUE)/sum(minvote, na.rm=TRUE)) %>% 
  mutate(apre_diff_dwca = apre12_dwca-apre1_dwca)



cat("DW-NOMINATE RESULTS")
sprintf("PCP Dim 1: %.0f%%",( (sum(res$n) - sum(res$ce1_dw))/sum(res$n))*100)
sprintf("PCP Dim 2: %.0f%%",( (sum(res$n) - sum(res$ce2_dw))/sum(res$n))*100)
sprintf("PCP Dim 1+2: %.0f%%",( (sum(res$n) - sum(res$ce12_dw))/sum(res$n))*100)

cat("W-NOMINATE (TERM-BY-TERM) RESULTS")
sprintf("PCP Dim 1: %.0f%%",( (sum(res$n) - sum(res$ce1_wn))/sum(res$n))*100)
sprintf("PCP Dim 2: %.0f%%",( (sum(res$n) - sum(res$ce2_wn))/sum(res$n))*100)
sprintf("PCP Dim 1+2: %.0f%%",( (sum(res$n) - sum(res$ce12_wn))/sum(res$n))*100)

cat("W-NOMINATE (ONE-SHOT) RESULTS")
sprintf("PCP Dim 1: %.0f%%",( (sum(res$n) - sum(res$ce1_wn1))/sum(res$n))*100)
sprintf("PCP Dim 2: %.0f%%",( (sum(res$n) - sum(res$ce2_wn1))/sum(res$n))*100)
sprintf("PCP Dim 1+2: %.0f%%",( (sum(res$n) - sum(res$ce12_wn1))/sum(res$n))*100)


cat("Percentage of PRE\n")
chi_nom_apre %>% 
  filter(dimension %in% c("Dimension 1", "Dimension 1+2")) %>% 
  select(session, model, dimension, apre) %>% 
  group_by(session, model) %>% 
  reframe(pct1 = apre[1]/sum(apre)*100, 
          pct2 = apre[2]/sum(apre)*100) %>%
  as.data.frame()

figures[['chi_nom_apre']] <- chi_nom_apre %>% 
  ggplot(aes(y=apre, fill=dimension)) + 
  geom_bar(aes(x=session_num), stat="identity")  + 
  theme_bw() + 
  labs(x="", y="APRE", fill="") + 
  theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.25), 
        text = element_text(size = 8),
        legend.position="top", 
        legend.background = element_blank(), 
        legend.key = element_rect(fill = "transparent", colour = "transparent"), 
        panel.grid=element_blank()) + 
  scale_fill_manual(values = c("grey70", "grey30")) +
  scale_y_continuous(labels=label_percent(accuracy=1)) + 
  scale_x_continuous(breaks = c(-.5, 1:12), labels=levels(chi_nom_apre$session)) + 
  guides(fill=guide_legend(nrow=1,byrow=TRUE)) + 
  facet_wrap(~model, ncol=1)

## correlation table figures
data <- read.dta13("source/chi_ward_councilors.dta") %>%
  select(starts_with("v_") | starts_with("q_") | starts_with("s_") | termlabel | session | alderman_id ) %>% 
  rename(name = alderman_id) 


data_dw <- data %>% 
  left_join(chi_latent %>% select(name, session, Dim1_dw, Dim2_dw)) %>% 
  select(-session, -name)

data_wn <- data %>% 
  left_join(chi_latent %>% select(name, session, Dim1_wn, Dim2_wn)) %>% 
  select(-session, -name)

data_wn1 <- data %>% 
  left_join(chi_latent %>% select(name, session, Dim1_wn1, Dim2_wn1)) %>% 
  select(-session, -name)

data_dwca <- data %>% 
  left_join(chi_latent %>% select(name, session, Dim1_dwca, Dim2_dwca)) %>% 
  select(-session, -name)


rownames(data_dw) <- rownames(data_wn) <- rownames(data_wn1) <- rownames(data_dwca) <- NULL

varname_lookup <- rio::import("source/varname_lookup.xlsx") 
varname_lookup <- bind_rows(varname_lookup, tibble(variable = "v_lfosci", varname = "% Science/Tech Jobs", vartype="status"))

byterm_dw <- split(data_dw, f = data_dw$termlabel) 
byterm_wn <- split(data_wn, f = data_wn$termlabel) 
byterm_wn1 <- split(data_wn1, f = data_wn1$termlabel) 
byterm_dwca <- split(data_dwca, f = data_dwca$termlabel) 

byterm_dw[["Pooled"]] <- data_dw  # add whole dataset as "pooled" item  
byterm_wn[["Pooled"]] <- data_wn  # add whole dataset as "pooled" item  
byterm_wn1[["Pooled"]] <- data_wn1  # add whole dataset as "pooled" item  
byterm_dwca[["Pooled"]] <- data_dwca  # add whole dataset as "pooled" item  


terms <- c("Pooled","1979-1983","1983-1987","1987-1987","1987-1989","1989-1991","1991-1995","1995-1999","1999-2003","2003-2007","2007-2011","2011-2015","2015-2019")

cormatrix_dw <- list()
cormatrix_d1_dw <- list()
cormatrix_d2_dw <- list()
cormatrix_wn <- list()
cormatrix_d1_wn <- list()
cormatrix_d2_wn <- list()
cormatrix_wn1 <- list()
cormatrix_d1_wn1 <- list()
cormatrix_d2_wn1 <- list()
cormatrix_dwca <- list()
cormatrix_d1_dwca <- list()
cormatrix_d2_dwca <- list()

for(t in terms){
  byterm_dw[[t]] <- byterm_dw[[t]] %>% select(-termlabel) # drop termlabel string variable from byterm to enable correlation
  cormatrix_dw[[t]] <- correlate(byterm_dw[[t]], quiet = TRUE) # correlate entire table
  byterm_wn[[t]] <- byterm_wn[[t]] %>% select(-termlabel) # drop termlabel string variable from byterm to enable correlation
  cormatrix_wn[[t]] <- correlate(byterm_wn[[t]], quiet = TRUE) # correlate entire table
  byterm_wn1[[t]] <- byterm_wn1[[t]] %>% select(-termlabel) # drop termlabel string variable from byterm to enable correlation
  cormatrix_wn1[[t]] <- correlate(byterm_wn1[[t]], quiet = TRUE) # correlate entire table
  cormatrix_d1_dw[[t]] <- cormatrix_dw[[t]] %>% # isolate correlations with Dim1_gs
    focus(Dim1_dw) %>%
    rename(!!paste0(t) := Dim1_dw)        # rename coefficient variable to termlabela
  cormatrix_d2_dw[[t]] <- cormatrix_dw[[t]] %>% # isolate correlations with Dim2_gs
    focus(Dim2_dw) %>%
    rename(!!paste0(t) := Dim2_dw)        # rename coefficient variable to termlabel
  cormatrix_d1_wn[[t]] <- cormatrix_wn[[t]] %>% # isolate correlations with Dim1_gs
    focus(Dim1_wn) %>%
    rename(!!paste0(t) := Dim1_wn)        # rename coefficient variable to termlabela
  cormatrix_d2_wn[[t]] <- cormatrix_wn[[t]] %>% # isolate correlations with Dim2_gs
    focus(Dim2_wn) %>%
    rename(!!paste0(t) := Dim2_wn)        # rename coefficient variable to termlabel
  cormatrix_d1_wn1[[t]] <- cormatrix_wn1[[t]] %>% # isolate correlations with Dim1_gs
    focus(Dim1_wn1) %>%
    rename(!!paste0(t) := Dim1_wn1)        # rename coefficient variable to termlabela
  cormatrix_d2_wn1[[t]] <- cormatrix_wn1[[t]] %>% # isolate correlations with Dim2_gs
    focus(Dim2_wn1) %>%
    rename(!!paste0(t) := Dim2_wn1)        # rename coefficient variable to termlabel
  byterm_dwca[[t]] <- byterm_dwca[[t]] %>% select(-termlabel) # drop termlabel string variable from byterm to enable correlation
  cormatrix_dwca[[t]] <- correlate(byterm_dwca[[t]], quiet = TRUE) # correlate entire table
  cormatrix_d1_dwca[[t]] <- cormatrix_dwca[[t]] %>% # isolate correlations with Dim1_gs
    focus(Dim1_dwca) %>%
    rename(!!paste0(t) := Dim1_dwca)        # rename coefficient variable to termlabela
  cormatrix_d2_dwca[[t]] <- cormatrix_dwca[[t]] %>% # isolate correlations with Dim2_gs
    focus(Dim2_dwca) %>%
    rename(!!paste0(t) := Dim2_dwca)        # rename coefficient variable to termlabel
  
  
}

# drop coefficients >= -.2 | <= .2
# drop if more than 3 NAs in row, Dim variables

cortable_d1_dw <- bind_cols(cormatrix_d1_dw) %>%
  rename(variable = term...1) %>%           # rename first variable name column
  select(!starts_with("term")) %>%          # drop duplicate variable name columns
  relocate(Pooled, .after = variable) %>% 
  dplyr::arrange(-Pooled) %>% 
  filter(!str_detect(variable, "legR")) %>% 
  left_join(varname_lookup, by = "variable") %>%
  relocate(c(vartype, varname), .after = variable) %>%
  mutate_if(is.numeric, round, 2) %>% 
  select(-variable)

cortable_d2_dw <- bind_cols(cormatrix_d2_dw) %>%
  rename(variable = term...1) %>%           # rename first variable name column
  select(!starts_with("term")) %>%          # drop duplicate variable name columns
  relocate(Pooled, .after = variable) %>% 
  dplyr::arrange(-Pooled) %>% 
  filter(!str_detect(variable, "legR")) %>% 
  left_join(varname_lookup, by = "variable") %>%
  relocate(c(vartype, varname), .after = variable) %>%
  mutate_if(is.numeric, round, 2) %>% 
  select(-variable)

cortable_d1_wn <- bind_cols(cormatrix_d1_wn) %>%
  rename(variable = term...1) %>%           # rename first variable name column
  select(!starts_with("term")) %>%          # drop duplicate variable name columns
  relocate(Pooled, .after = variable) %>% 
  dplyr::arrange(-Pooled) %>% 
  filter(!str_detect(variable, "legR")) %>% 
  left_join(varname_lookup, by = "variable") %>%
  relocate(c(vartype, varname), .after = variable) %>%
  mutate_if(is.numeric, round, 2) %>% 
  select(-variable)

cortable_d2_wn <- bind_cols(cormatrix_d2_wn) %>%
  rename(variable = term...1) %>%           # rename first variable name column
  select(!starts_with("term")) %>%          # drop duplicate variable name columns
  relocate(Pooled, .after = variable) %>% 
  dplyr::arrange(-Pooled) %>% 
  filter(!str_detect(variable, "legR")) %>% 
  left_join(varname_lookup, by = "variable") %>%
  relocate(c(vartype, varname), .after = variable) %>%
  mutate_if(is.numeric, round, 2) %>% 
  select(-variable)

cortable_d1_wn1 <- bind_cols(cormatrix_d1_wn1) %>%
  rename(variable = term...1) %>%           # rename first variable name column
  select(!starts_with("term")) %>%          # drop duplicate variable name columns
  relocate(Pooled, .after = variable) %>% 
  dplyr::arrange(-Pooled) %>% 
  filter(!str_detect(variable, "legR")) %>% 
  left_join(varname_lookup, by = "variable") %>%
  relocate(c(vartype, varname), .after = variable) %>%
  mutate_if(is.numeric, round, 2) %>% 
  select(-variable)

cortable_d2_wn1 <- bind_cols(cormatrix_d2_wn1) %>%
  rename(variable = term...1) %>%           # rename first variable name column
  select(!starts_with("term")) %>%          # drop duplicate variable name columns
  relocate(Pooled, .after = variable) %>% 
  dplyr::arrange(-Pooled) %>% 
  filter(!str_detect(variable, "legR")) %>% 
  left_join(varname_lookup, by = "variable") %>%
  relocate(c(vartype, varname), .after = variable) %>%
  mutate_if(is.numeric, round, 2) %>% 
  select(-variable)

cortable_d1_dwca <- bind_cols(cormatrix_d1_dwca) %>%
  rename(variable = term...1) %>%           # rename first variable name column
  select(!starts_with("term")) %>%          # drop duplicate variable name columns
  relocate(Pooled, .after = variable) %>% 
  dplyr::arrange(-Pooled) %>% 
  filter(!str_detect(variable, "legR")) %>% 
  left_join(varname_lookup, by = "variable") %>%
  relocate(c(vartype, varname), .after = variable) %>%
  mutate_if(is.numeric, round, 2) %>% 
  select(-variable)

cortable_d2_dwca <- bind_cols(cormatrix_d2_dwca) %>%
  rename(variable = term...1) %>%           # rename first variable name column
  select(!starts_with("term")) %>%          # drop duplicate variable name columns
  relocate(Pooled, .after = variable) %>% 
  dplyr::arrange(-Pooled) %>% 
  filter(!str_detect(variable, "legR")) %>% 
  left_join(varname_lookup, by = "variable") %>%
  relocate(c(vartype, varname), .after = variable) %>%
  mutate_if(is.numeric, round, 2) %>% 
  select(-variable)



### Chicago, D1 DW-NOMINATE ----
tmp1_dw <- cortable_d1_dw %>% 
  dplyr::arrange(Pooled) %>% 
  mutate(np = Pooled < 0, 
         across(3:15, ~ifelse(abs(.x) > .15, .x, NA))) %>% 
  rowwise() %>% 
  mutate(count_na = sum(is.na(c_across(3:15)))) %>% 
  ungroup %>% 
  filter(count_na <= 2) %>% 
  mutate(fac = factor(1:n(), labels=varname)) 
yint <- max(which(tmp1_dw$np))-.5
tmp1_dw <- tmp1_dw %>% 
  pivot_longer(3:15, names_to="period", values_to="vals") %>% 
  mutate(period = factor(period, levels=unique(period)),
         bg = cut(vals, c(-1, -.6, -.4, .4, .6, 1)), 
         bld = ifelse(abs(vals) > .6, "bold", "plain")) %>% 
  na.omit() %>% 
  mutate(fac = droplevels(fac))
vt <- tmp1_dw %>% 
  group_by(fac) %>% 
  summarise(vartype = first(vartype))

figures[['chi_cor_d1_dw']] <- ggplot(tmp1_dw, aes(x=period, y=as.numeric(fac), fill=bg)) + 
  geom_tile( show.legend = FALSE) + 
  geom_text(aes(label=sprintf("%.2f", vals), fontface=bld), size=3.5) + 
  geom_vline(xintercept=1.5, linetype=2) + 
  geom_hline(yintercept=yint) + 
  theme_bw() + 
  theme(panel.grid=element_blank(), 
        axis.text.x = element_text(size=10, angle=60, hjust=1), 
        axis.text.y = element_text(size=10)) + 
  scale_fill_manual(values=c("gray40", "gray70", "white", "gray70", "gray40")) + 
  scale_y_continuous(breaks=seq_along(levels(tmp1_dw$fac)), labels=levels(tmp1_dw$fac), 
                     sec.axis = sec_axis(trans = ~ .x, breaks = seq_along(levels(tmp1_dw$fac)), labels=vt$vartype)) + 
  labs(x="", y="") + 
  coord_cartesian(clip="off", ylim=c(.5, max(as.numeric(tmp1_dw$fac))+.5), expand=FALSE) 

### Chicago, D2 DW-NOMINATE ----
colvals <- c("gray40", "gray70", "white", "gray70", "gray40")

tmp2_dw <- cortable_d2_dw %>% 
  dplyr::arrange(Pooled) %>% 
  mutate(np = Pooled < 0, 
         across(3:15, ~ifelse(abs(.x) > .15, .x, NA))) %>% 
  rowwise() %>% 
  mutate(count_na = sum(is.na(c_across(3:15)))) %>% 
  ungroup %>% 
  filter(count_na <= 5) %>% 
  mutate(fac = factor(1:n(), labels=varname)) 
yint <- max(which(tmp2_dw$np))+.5
tmp2_dw <- tmp2_dw %>% 
  pivot_longer(3:15, names_to="period", values_to="vals") %>% 
  mutate(period = factor(period, levels=unique(period)),
         bg = cut(vals, c(-1, -.6, -.4, .4, .6, 1)), 
         bld = ifelse(abs(vals) > .6, "bold", "plain")) %>% 
  na.omit() %>% 
  mutate(fac = droplevels(fac))
vt <- tmp2_dw %>% 
  group_by(fac) %>% 
  summarise(vartype = first(vartype))
bgtab <- table(tmp2_dw$bg)
if(any(bgtab == 0))colvals <- colvals[-which(bgtab == 0)]

figures[['chi_cor_d2_dw']] <- ggplot(tmp2_dw, aes(x=as.numeric(period), y=as.numeric(fac), fill=bg)) + 
  geom_tile( show.legend = FALSE) + 
  geom_text(aes(label=sprintf("%.2f", vals), fontface=bld), size=3.5) + 
  geom_vline(xintercept=1.5, linetype=2) + 
  geom_hline(yintercept=yint) + 
  theme_bw() + 
  theme(panel.grid=element_blank(), 
        axis.text.x = element_text(size=10, angle=60, hjust=1),
        axis.text.y = element_text(size=10)) + 
  scale_fill_manual(values=colvals) + 
  scale_y_continuous(breaks=seq_along(levels(tmp2_dw$fac)), labels=levels(tmp2_dw$fac), 
                     sec.axis = sec_axis(trans = ~ .x, breaks = seq_along(levels(tmp2_dw$fac)), labels=vt$vartype)) + 
  scale_x_continuous(breaks = 1:13, labels=levels(tmp2_dw$period)) + 
  labs(x="", y="") + 
  coord_cartesian(clip="off", xlim = c(.5, 13.5), ylim=c(.5, max(as.numeric(tmp2_dw$fac))+.5), expand=FALSE) 

### Chicago, D1 W-NOMINATE (term by term) ----
tmp1_wn <- cortable_d1_wn %>% 
  dplyr::arrange(Pooled) %>% 
  mutate(np = Pooled < 0, 
         across(3:15, ~ifelse(abs(.x) > .15, .x, NA))) %>% 
  rowwise() %>% 
  mutate(count_na = sum(is.na(c_across(3:15)))) %>% 
  ungroup %>% 
  filter(count_na <= 2) %>% 
  mutate(fac = factor(1:n(), labels=varname)) 
yint <- max(which(tmp1_wn$np))-.5
tmp1_wn <- tmp1_wn %>% 
  pivot_longer(3:15, names_to="period", values_to="vals") %>% 
  mutate(period = factor(period, levels=unique(period)),
         bg = cut(vals, c(-1, -.6, -.4, .4, .6, 1)), 
         bld = ifelse(abs(vals) > .6, "bold", "plain")) %>% 
  na.omit() %>% 
  mutate(fac = droplevels(fac))
vt <- tmp1_wn %>% 
  group_by(fac) %>% 
  summarise(vartype = first(vartype))

figures[['chi_cor_d1_wn']] <- ggplot(tmp1_wn, aes(x=period, y=as.numeric(fac), fill=bg)) + 
  geom_tile( show.legend = FALSE) + 
  geom_text(aes(label=sprintf("%.2f", vals), fontface=bld), size=3.5) + 
  geom_vline(xintercept=1.5, linetype=2) + 
  geom_hline(yintercept=yint) + 
  theme_bw() + 
  theme(panel.grid=element_blank(), 
        axis.text.x = element_text(size=10, angle=60, hjust=1), 
        axis.text.y = element_text(size=10)) + 
  scale_fill_manual(values=c("gray40", "gray70", "white", "gray70", "gray40")) + 
  scale_y_continuous(breaks=seq_along(levels(tmp1_wn$fac)), labels=levels(tmp1_wn$fac), 
                     sec.axis = sec_axis(trans = ~ .x, breaks = seq_along(levels(tmp1_wn$fac)), labels=vt$vartype)) + 
  labs(x="", y="") + 
  coord_cartesian(clip="off", ylim=c(.5, max(as.numeric(tmp1_wn$fac))+.5), expand=FALSE) 

### Chicago, D2 W-NOMINATE (term by term) ----
colvals <- c("gray40", "gray70", "white", "gray70", "gray40")

tmp2_wn <- cortable_d2_wn %>% 
  dplyr::arrange(Pooled) %>% 
  mutate(np = Pooled < 0, 
         across(3:15, ~ifelse(abs(.x) > .15, .x, NA))) %>% 
  rowwise() %>% 
  mutate(count_na = sum(is.na(c_across(3:15)))) %>% 
  ungroup %>% 
  filter(count_na <= 5) %>% 
  mutate(fac = factor(1:n(), labels=varname)) 
yint <- max(which(tmp2_wn$np))+.5
tmp2_wn <- tmp2_wn %>% 
  pivot_longer(3:15, names_to="period", values_to="vals") %>% 
  mutate(period = factor(period, levels=unique(period)),
         bg = cut(vals, c(-1, -.6, -.4, .4, .6, 1)), 
         bld = ifelse(abs(vals) > .6, "bold", "plain")) %>% 
  na.omit() %>% 
  mutate(fac = droplevels(fac))
vt <- tmp2_wn %>% 
  group_by(fac) %>% 
  summarise(vartype = first(vartype))
bgtab <- table(tmp2_wn$bg)
if(any(bgtab == 0))colvals <- colvals[-which(bgtab == 0)]

figures[['chi_cor_d2_wn']] <- ggplot(tmp2_wn, aes(x=as.numeric(period), y=as.numeric(fac), fill=bg)) + 
  geom_tile( show.legend = FALSE) + 
  geom_text(aes(label=sprintf("%.2f", vals), fontface=bld), size=3.5) + 
  geom_vline(xintercept=1.5, linetype=2) + 
  geom_hline(yintercept=yint) + 
  theme_bw() + 
  theme(panel.grid=element_blank(), 
        axis.text.x = element_text(size=10, angle=60, hjust=1),
        axis.text.y = element_text(size=10)) + 
  scale_fill_manual(values=colvals) + 
  scale_y_continuous(breaks=seq_along(levels(tmp2_wn$fac)), labels=levels(tmp2_wn$fac), 
                     sec.axis = sec_axis(trans = ~ .x, breaks = seq_along(levels(tmp2_wn$fac)), labels=vt$vartype)) + 
  scale_x_continuous(breaks = 1:13, labels=levels(tmp2_wn$period)) + 
  labs(x="", y="") + 
  coord_cartesian(clip="off", xlim = c(.5, 13.5), ylim=c(.5, max(as.numeric(tmp2_wn$fac))+.5), expand=FALSE) 

### Chicago, D1 W-NOMINATE (one shot) ----
tmp1_wn1 <- cortable_d1_wn1 %>% 
  dplyr::arrange(Pooled) %>% 
  mutate(np = Pooled < 0, 
         across(3:15, ~ifelse(abs(.x) > .15, .x, NA))) %>% 
  rowwise() %>% 
  mutate(count_na = sum(is.na(c_across(3:15)))) %>% 
  ungroup %>% 
  filter(count_na <= 5) %>% 
  mutate(fac = factor(1:n(), labels=varname)) 
yint <- max(which(tmp1_wn1$np))-.5
tmp1_wn1 <- tmp1_wn1 %>% 
  pivot_longer(3:15, names_to="period", values_to="vals") %>% 
  mutate(period = factor(period, levels=unique(period)),
         bg = cut(vals, c(-1, -.6, -.4, .4, .6, 1)), 
         bld = ifelse(abs(vals) > .6, "bold", "plain")) %>% 
  na.omit() %>% 
  mutate(fac = droplevels(fac))
vt <- tmp1_wn1 %>% 
  group_by(fac) %>% 
  summarise(vartype = first(vartype))

figures[['chi_cor_d1_wn1']] <- ggplot(tmp1_wn1, aes(x=period, y=as.numeric(fac), fill=bg)) + 
  geom_tile( show.legend = FALSE) + 
  geom_text(aes(label=sprintf("%.2f", vals), fontface=bld), size=3.5) + 
  geom_vline(xintercept=1.5, linetype=2) + 
  geom_hline(yintercept=yint) + 
  theme_bw() + 
  theme(panel.grid=element_blank(), 
        axis.text.x = element_text(size=10, angle=60, hjust=1), 
        axis.text.y = element_text(size=10)) + 
  scale_fill_manual(values=c("gray40", "gray70", "white", "gray70", "gray40")) + 
  scale_y_continuous(breaks=seq_along(levels(tmp1_wn1$fac)), labels=levels(tmp1_wn1$fac), 
                     sec.axis = sec_axis(trans = ~ .x, breaks = seq_along(levels(tmp1_wn1$fac)), labels=vt$vartype)) + 
  labs(x="", y="") + 
  coord_cartesian(clip="off", ylim=c(.5, max(as.numeric(tmp1_wn1$fac))+.5), expand=FALSE) 

### Chicago, D2 W-NOMINATE (one shot) ----
colvals <- c("gray40", "gray70", "white", "gray70", "gray40")

tmp2_wn1 <- cortable_d2_wn1 %>% 
  dplyr::arrange(Pooled) %>% 
  mutate(np = Pooled < 0, 
         across(3:15, ~ifelse(abs(.x) > .15, .x, NA))) %>% 
  rowwise() %>% 
  mutate(count_na = sum(is.na(c_across(3:15)))) %>% 
  ungroup %>% 
  filter(count_na <= 5) %>% 
  mutate(fac = factor(1:n(), labels=varname)) 
yint <- max(which(tmp2_wn1$np))+.5
tmp2_wn1 <- tmp2_wn1 %>% 
  pivot_longer(3:15, names_to="period", values_to="vals") %>% 
  mutate(period = factor(period, levels=unique(period)),
         bg = cut(vals, c(-1, -.6, -.4, .4, .6, 1)), 
         bld = ifelse(abs(vals) > .6, "bold", "plain")) %>% 
  na.omit() %>% 
  mutate(fac = droplevels(fac))
vt <- tmp2_wn1 %>% 
  group_by(fac) %>% 
  summarise(vartype = first(vartype))
bgtab <- table(tmp2_wn1$bg)
if(any(bgtab == 0))colvals <- colvals[-which(bgtab == 0)]

figures[['chi_cor_d2_wn1']] <- ggplot(tmp2_wn1, aes(x=as.numeric(period), y=as.numeric(fac), fill=bg)) + 
  geom_tile( show.legend = FALSE) + 
  geom_text(aes(label=sprintf("%.2f", vals), fontface=bld), size=3.5) + 
  geom_vline(xintercept=1.5, linetype=2) + 
  geom_hline(yintercept=yint) + 
  theme_bw() + 
  theme(panel.grid=element_blank(), 
        axis.text.x = element_text(size=10, angle=60, hjust=1),
        axis.text.y = element_text(size=10)) + 
  scale_fill_manual(values=colvals) + 
  scale_y_continuous(breaks=seq_along(levels(tmp2_wn1$fac)), labels=levels(tmp2_wn1$fac), 
                     sec.axis = sec_axis(trans = ~ .x, breaks = seq_along(levels(tmp2_wn1$fac)), labels=vt$vartype)) + 
  scale_x_continuous(breaks = 1:13, labels=levels(tmp2_wn1$period)) + 
  labs(x="", y="") + 
  coord_cartesian(clip="off", xlim = c(.5, 13.5), ylim=c(.5, max(as.numeric(tmp2_wn1$fac))+.5), expand=FALSE) 



### Chicago, D1 DW-NOMINATE (Curated Variables, one 2D model)----
tmp1_dwca <- cortable_d1_dwca %>% 
  dplyr::arrange(Pooled) %>% 
  mutate(np = Pooled < 0, 
         across(3:15, ~ifelse(abs(.x) > .15, .x, NA))) %>% 
  rowwise() %>% 
  mutate(count_na = sum(is.na(c_across(3:15)))) %>% 
  ungroup %>% 
  filter(count_na <= 2) %>% 
  mutate(fac = factor(1:n(), labels=varname)) 
yint <- max(which(tmp1_dwca$np))-.5
tmp1_dwca <- tmp1_dwca %>% 
  pivot_longer(3:15, names_to="period", values_to="vals") %>% 
  mutate(period = factor(period, levels=unique(period)),
         bg = cut(vals, c(-1, -.6, -.4, .4, .6, 1)), 
         bld = ifelse(abs(vals) > .6, "bold", "plain")) %>% 
  na.omit() %>% 
  mutate(fac = droplevels(fac))
vt <- tmp1_dwca %>% 
  group_by(fac) %>% 
  summarise(vartype = first(vartype))

figures[['chi_cor_d1_dwca']] <- ggplot(tmp1_dwca, aes(x=period, y=as.numeric(fac), fill=bg)) + 
  geom_tile( show.legend = FALSE) + 
  geom_text(aes(label=sprintf("%.2f", vals), fontface=bld), size=3.5) + 
  geom_vline(xintercept=1.5, linetype=2) + 
  geom_hline(yintercept=yint) + 
  theme_bw() + 
  theme(panel.grid=element_blank(), 
        axis.text.x = element_text(size=10, angle=60, hjust=1), 
        axis.text.y = element_text(size=10)) + 
  scale_fill_manual(values=c("gray40", "gray70", "white", "gray70", "gray40")) + 
  scale_y_continuous(breaks=seq_along(levels(tmp1_dwca$fac)), labels=levels(tmp1_dwca$fac), 
                     sec.axis = sec_axis(trans = ~ .x, breaks = seq_along(levels(tmp1_dwca$fac)), labels=vt$vartype)) + 
  labs(x="", y="") + 
  coord_cartesian(clip="off", ylim=c(.5, max(as.numeric(tmp1_dwca$fac))+.5), expand=FALSE) 

### Chicago, D2 DW-NOMINATE (Curated Variables, one 2D model) ----
colvals <- c("gray40", "gray70", "white", "gray70", "gray40")

tmp2_dwca <- cortable_d2_dwca %>% 
  dplyr::arrange(Pooled) %>% 
  mutate(np = Pooled < 0, 
         across(3:15, ~ifelse(abs(.x) > .15, .x, NA))) %>% 
  rowwise() %>% 
  mutate(count_na = sum(is.na(c_across(3:15)))) %>% 
  ungroup %>% 
  filter(count_na <= 5) %>% 
  mutate(fac = factor(1:n(), labels=varname)) 
yint <- max(which(tmp2_dwca$np))+.5
tmp2_dwca <- tmp2_dwca %>% 
  pivot_longer(3:15, names_to="period", values_to="vals") %>% 
  mutate(period = factor(period, levels=unique(period)),
         bg = cut(vals, c(-1, -.6, -.4, .4, .6, 1)), 
         bld = ifelse(abs(vals) > .6, "bold", "plain")) %>% 
  na.omit() %>% 
  mutate(fac = droplevels(fac))
vt <- tmp2_dwca %>% 
  group_by(fac) %>% 
  summarise(vartype = first(vartype))
bgtab <- table(tmp2_dwca$bg)
if(any(bgtab == 0))colvals <- colvals[-which(bgtab == 0)]

figures[['chi_cor_d2_dwca']] <- ggplot(tmp2_dwca, aes(x=as.numeric(period), y=as.numeric(fac), fill=bg)) + 
  geom_tile( show.legend = FALSE) + 
  geom_text(aes(label=sprintf("%.2f", vals), fontface=bld), size=3.5) + 
  geom_vline(xintercept=1.5, linetype=2) + 
  geom_hline(yintercept=yint) + 
  theme_bw() + 
  theme(panel.grid=element_blank(), 
        axis.text.x = element_text(size=10, angle=60, hjust=1),
        axis.text.y = element_text(size=10)) + 
  scale_fill_manual(values=colvals) + 
  scale_y_continuous(breaks=seq_along(levels(tmp2_dwca$fac)), labels=levels(tmp2_dwca$fac), 
                     sec.axis = sec_axis(trans = ~ .x, breaks = seq_along(levels(tmp2_dwca$fac)), labels=vt$vartype)) + 
  scale_x_continuous(breaks = 1:13, labels=levels(tmp2_dwca$period)) + 
  labs(x="", y="") + 
  coord_cartesian(clip="off", xlim = c(.5, 13.5), ylim=c(.5, max(as.numeric(tmp2_dwca$fac))+.5), expand=FALSE) 

# Save out figures ----

chi_figures <- figures
save(chi_figures, chi_apre, seed, file="temp/chicago_figures.rda")
save.image("temp/chicago_complete.rda")

closeAllConnections() # Close connection to log file
